---
title: ""How Does Official Secrecy Shape the Study of U.S. Foreign Relations? replication for graphs and tables
author: "Yeseul Byeon"
date: "2024-07-29"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
library(tidyverse)
library(ggplot2)
library(stringi)

#libs for piecing together density graphs
library(gtable)
#library(patchwork)
```

```{r}
#Fig 8 FRUS vs non FRUS citing DH articles histogram

dh_all_articles <- read_csv("dh_allarticles_df_091520.csv")

dh_filt_articles <- dh_all_articles %>% separate(pub_year, sep = " ", into = c("pub_mon", "pub_year"), remove = TRUE) %>%  filter(wc > 1999) %>% filter(final_fn > 9) %>% filter(b_review == 0)
dh_filt_articles$pub_year <- as.integer(dh_filt_articles$pub_year)
dh_filt_articles_plot <- dh_filt_articles %>% group_by(pub_year) %>% summarize (n = n()) 

dh_filt_articles_plot$success_n <- success_hist_plot$success_n
dh_filt_articles_plot <- dh_filt_articles_plot %>% mutate(pct = success_n/n) 

library(scales)

dh_filt_articles_plot$pct <- dh_filt_articles_plot$pct %>% label_percent(accuracy = 1L)()
 
labels <- dh_filt_articles_plot
success <- read_csv("success_2021_jun2.csv")
success_hist <- success %>% group_by(`Article Title`) %>% summarize(pub_year = first(`Article Year`))

success_hist_plot <- success_hist %>% group_by(pub_year) %>% summarize(success_n = n())

legend <- c("Articles that cite FRUS (1+ citations)" = "#66A61E", "Articles that do not cite FRUS" = "#666666")

#ggplot() + geom_col(dh_filt_articles_plot, mapping = aes(pub_year, n), fill = "#666666") + geom_col(success_hist_plot, mapping = aes(pub_year, success_n), fill = "#66A61E") + labs(x = "Published Year", y = "No. of Articles") + theme_minimal()

fig1 <- ggplot() + geom_col(dh_filt_articles_plot, mapping = aes(x = pub_year, y = n, fill = "Articles that do not cite FRUS"), width = 0.8, position = position_dodge(1.2)) + geom_col(success_hist_plot, mapping = aes(x = pub_year, y = success_n, fill = "Articles that cite FRUS (1+ citations)"), width = 0.8, position = position_dodge(1.2)) + labs(x = "Published Year", y = "No. of Articles") + theme_minimal() + scale_fill_manual(name = " ", values = legend) + theme(legend.position = "top")


#ggsave("fig1_dh_articles.jpg", fig1, width = 7, height = 4)
ggsave("fig1_dh_articles_jun10.jpg", fig1, width = 7, height = 4)


```


```{r}
#Fig 2 Stacked Density Graphs - comparisons with AHR and JAH

#weighted ver for density graphs
sep19_dh_years <- read_csv("1000rowForEachYearMentionAllDHDatapoints09172020.csv") 
sep19_ahr_years <- read_csv("1000rowForEachYearMentionAllAHRDatapoints09172020.csv")
sep19_jah_years <- read_csv("1000rowForEachYearMentionAllJAHDatapoints09172020.csv")

apr21_dh_years <- read_csv("1000rowForEachYearMentionAllDHDatapoints04192021.csv") 

sep19_dh_filt <-  sep19_dh_years %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear)
sep19_jah_filt_density <- sep19_jah_years %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear)
sep19_ahr_filt_density <- sep19_ahr_years %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear)
  

dh_graphing <- as.data.frame(lapply(sep19_dh_filt, rep, sep19_dh_filt$count))
dh_graphing %>% glimpse()


#ahr_graph <- ahr_years %>% unique()
jah_graphing <- as.data.frame(lapply(sep19_jah_filt_density, rep, sep19_jah_filt_density$count)) #%>% filter(yearMention > 1860) %>% filter(yearMention < pubYear)

#jah_graph <- jah_years %>% unique()
ahr_graphing <- as.data.frame(lapply(sep19_ahr_filt_density, rep, sep19_ahr_filt_density$count)) #%>% filter(yearMention > 1860) %>% filter(yearMention < pubYear)


dh_filt_violin <-  sep19_dh_filt %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear) %>% mutate(Journal = "Diplomatic History") %>% filter(pubYear > 2005)
jah_filt_violin <- sep19_jah_filt_density %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear) %>% mutate(Journal = "JAH") %>% filter(pubYear > 2005)
ahr_filt_violin <- sep19_ahr_filt_density %>% filter(yearMention > 1491) %>% filter(yearMention < pubYear) %>% mutate(Journal = "AHR") %>% filter(pubYear > 2005)

violin <- bind_rows(ahr_filt_violin, jah_filt_violin, dh_filt_violin)  


violin$Journal <- factor(violin$Journal, levels = c("AHR", "JAH", "Diplomatic History"))

ggplot(violin, aes(x = yearMention, fill = Journal)) + geom_density(alpha = 0.6) + facet_grid(Journal ~.) + theme_bw() + labs(x = "Year (mentioned in article)", y = "Density")




```

```{r}
#Fig 3 Stacked Density Graphs - DH
#adapted from code by Kieran Healy at https://socviz.co

#read in ni/N dataset for years in DH research articles
apr21_dh_years <- read_csv("1000rowForEachYearMentionAllDHDatapoints04192021.csv") 

#apr21_dh_years_copy <- apr21_dh_years
apr21_dh_years <- apr21_dh_years %>% mutate(pub_decade=case_when(
    pubYear %in% 1977:1980 ~ "77-80",
    pubYear %in% 1981:1984 ~ "81-84",
    pubYear %in% 1985:1988 ~ "85-88",
    pubYear %in% 1989:1992 ~ "89-92",
    pubYear %in% 1993:1996 ~ "93-96",
    pubYear %in% 1997:2000 ~ "97-00",
    pubYear %in% 2001:2004 ~ "01-04",
    pubYear %in% 2005:2008 ~ "05-08",
    pubYear %in% 2009:2012 ~ "09-12",
    pubYear %in% 2013:2016 ~ "13-16",
    pubYear %in% 2017:2020 ~ "17-20")) %>% filter(yearMention > 1860) %>% filter(yearMention < pubYear)


apr21_dh_years$pub_decade <- factor(apr21_dh_years$pub_decade, levels = c("77-80", "81-84", "85-88", "89-92", "93-96", "97-00","01-04","05-08","09-12", "13-16", "17-20"))


markers <- apr21_dh_years %>%
    dplyr::group_by(pub_decade) %>%
    dplyr::summarize(xbar = median(yearMention, na.rm = TRUE), lower = quantile(yearMention, probs = .25, na.rm = TRUE), upper = quantile(yearMention, probs = .75, na.rm = TRUE), y = 0.3)

#p_4yr <- ggplot(data = apr21_dh_years, mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) + geom_text(data = markers, aes(x = xbar, y = 0.02, label = xbar), nudge_x = 2.5, size = 1.5) + geom_vline(data = markers, aes(xintercept = xbar), color = "black", size = 0.3) + geom_vline(data = markers, aes(xintercept = lower), color = "red", size = 0.3) + geom_vline(data = markers, aes(xintercept = upper), color = "red", size = 0.3) + geom_text(data = markers, aes(x = lower, y = 0.02, label = lower), nudge_x = 2.5, size = 1.5, color = "red") + geom_text(data = markers, aes(x = upper, y = 0.02, label = upper), nudge_x = 2.5, size = 1.5, color = "red") + facet_grid(pub_decade ~., switch = "y") + theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), strip.text.y = element_text(angle = 180, size = 6)) + labs(x = "Year (mentioned in article)", y = "Density (Scaled 0-0.07)") + ylim(0,0.07)
#p_4yr

```

```{r}
#individual density graphs

p1 <- ggplot(data = subset(apr21_dh_years, pub_decade == "77-80"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "77-80"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = -4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "77-80"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "77-80"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
   geom_segment(data = subset(markers, pub_decade == "77-80"), aes(x = upper, xend = upper, y = 0, yend = 0.1), color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "77-80"), aes(x = lower, y = 0.005, label = lower), nudge_x = 4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "77-80"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

p2 <- ggplot(data = subset(apr21_dh_years, pub_decade == "81-84"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "81-84"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = -4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "81-84"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "81-84"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "81-84"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "81-84"), aes(x = lower, y = 0.005, label = lower), nudge_x = 4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "81-84"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), strip.text.y = element_text(angle = 180, size = 6), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)


p3 <- ggplot(data = subset(apr21_dh_years, pub_decade == "85-88"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "85-88"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = -4, size = 8 ) + 
    geom_segment(data = subset(markers, pub_decade == "85-88"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "85-88"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "85-88"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "85-88"), aes(x = lower, y = 0.005, label = lower), nudge_x = 4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "85-88"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(),  axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)


p4 <- ggplot(data = subset(apr21_dh_years, pub_decade == "89-92"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "89-92"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "89-92"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "89-92"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "89-92"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "89-92"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "89-92"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)


p5 <- ggplot(data = subset(apr21_dh_years, pub_decade == "93-96"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "93-96"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "93-96"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "93-96"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "93-96"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "93-96"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "93-96"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

p6 <- ggplot(data = subset(apr21_dh_years, pub_decade == "97-00"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "97-00"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "97-00"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "97-00"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "97-00"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "97-00"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "97-00"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), strip.text.y = element_text(angle = 180, size = 6), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

p7 <- ggplot(data = subset(apr21_dh_years, pub_decade == "01-04"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "01-04"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "01-04"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "01-04"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "01-04"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "01-04"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "01-04"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)


p8 <- ggplot(data = subset(apr21_dh_years, pub_decade == "05-08"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade == "05-08"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "05-08"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "05-08"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade == "05-08"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "05-08"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "05-08"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

p9 <- ggplot(data = subset(apr21_dh_years, pub_decade == "09-12"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade =="09-12"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + geom_segment(data = subset(markers, pub_decade == "09-12"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) + geom_segment(data = subset(markers, pub_decade =="09-12"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) + geom_segment(data = subset(markers, pub_decade =="09-12"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) + geom_text(data = subset(markers, pub_decade == "09-12"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + geom_text(data = subset(markers, pub_decade == "09-12"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), strip.text.y = element_text(angle = 180, size = 6), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)



p10 <- ggplot(data = subset(apr21_dh_years, pub_decade == "13-16"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) + geom_text(data = subset(markers, pub_decade =="13-16"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + geom_segment(data = subset(markers, pub_decade == "13-16"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) + geom_segment(data = subset(markers, pub_decade =="13-16"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade =="13-16"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "13-16"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "13-16"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), strip.text.y = element_text(angle = 180, size = 6), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

p11 <- ggplot(data = subset(apr21_dh_years, pub_decade == "17-20"), mapping = aes(x = yearMention)) + geom_density(fill = "lightblue", color = FALSE, alpha = 0.6) +
    geom_text(data = subset(markers, pub_decade =="17-20"), aes(x = xbar, y = 0.005, label = xbar), nudge_x = 4, size = 8) + 
    geom_segment(data = subset(markers, pub_decade == "17-20"), aes(x = xbar, xend = xbar, y = 0, yend = 0.1), color = "black", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade =="17-20"), aes(x = lower, xend = lower, y = 0, yend = 0.1), color = "red", size = 0.5) +
    geom_segment(data = subset(markers, pub_decade =="17-20"), aes(x = upper, xend = upper, y = 0, yend = 0.1),  color = "red", size = 0.5) +
   geom_text(data = subset(markers, pub_decade == "17-20"), aes(x = lower, y = 0.005, label = lower), nudge_x = -4, size = 8, color = "red") + 
   geom_text(data = subset(markers, pub_decade == "17-20"), aes(x = upper, y = 0.005, label = upper), nudge_x = 4, size = 8, color = "red") +  theme_minimal() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.text.y = element_blank(), axis.title = element_blank(), strip.text.y = element_text(angle = 180, size = 6), axis.text.x = element_text(size = 24)) + ylim(0,0.07) + xlim(1860, 2020)

```


```{r}
#library(gtable)
#library(patchwork)
#a <- p1 / p2 / p3 / p4 / p5 / p6 / p7 / p8 / p9 / p10 / p11 + plot_layout(ncol = 1 , heights = c(4,4,4,4,4,4,4,4,4,4,4))
#aligned <- align_plots(p1, p2, p3, p4, p5, p6, p11, align = "v")
a2 <- p1 / p2 / p3 / p4 / p5 / p6 / p7 / p8 / p9 / p10 / p11 + plot_layout(ncol = 1 , heights = c(3,3,3,3,3,3,3,3,3,3,3))
#ggsave("fig4_stacked_density_dh.jpg", a, width = 30, height = 44)

#ggsave("fig4_stacked_density_dh2.jpg", a2, width = 24, height = 33)
```

```{r}
#Fig 5 By subseries

volume_list <- read_csv("frus_volumes_oct1_2020.csv") 

volume_list %>% filter(release_year > 1976)
volume_list_merge <- volume_list %>% unite(vol_ID, c(subseries, volume_number), sep = "|")
```
```{r}
volume_list <- read_csv("frus_volumes_oct1_2020.csv") 

volume_list %>% filter(release_year > 1976)
volume_list_merge <- volume_list %>% unite(vol_ID, c(subseries, volume_number), sep = "|")
volume_list_merge

success <- read_csv("success_2021_jun2.csv", col_types = "cdccc")

success_merge <- success %>% unite(vol_ID, c(`FRUS Year`, `FRUS Volume`), sep = "|")

r <- left_join(success_merge, volume_list_merge, by = "vol_ID") %>% unite(article_ID, c(`Article Title`, vol_ID), sep = "_")

unique_r <- r %>% dplyr::group_by(`article_ID`) %>% dplyr::summarize(release = first (release_year), article_year = first(`Article Year`)) %>% separate(`article_ID`, sep = "_", into = c("title", "vol_ID"))
unique_r

first_citation <- unique_r %>% filter(release > 1976) %>% group_by(vol_ID, release) %>% arrange(article_year)

first_citation_frus <- first_citation %>% summarize(first_cited_yr = first(article_year)) %>% mutate(lag = first_cited_yr-release)

first_citation_all <- unique_r %>% group_by(vol_ID, release) %>% arrange(article_year)
first_citation_frus_all <- first_citation_all %>% summarize(first_cited_yr = first(article_year)) %>% mutate(lag = first_cited_yr-release)

first_citation_frus %>% group_by(lag) %>% summarize(n = n())
first_citation_frus$first_cited_yr <- as.integer(first_citation_frus$first_cited_yr)

checks <- unique_r %>% group_by(vol_ID) %>% summarize(n = n())

```

```{r}

#Figure 8 Survival Plot 

volume_list <- read_csv("frus_volumes_oct1_2020.csv") 

volume_list %>% filter(release_year > 1976)
volume_list_merge <- volume_list %>% unite(vol_ID, c(subseries, volume_number), sep = "|")
volume_list_merge

success <- read_csv("success_2021_jun2.csv", col_types = "cdccc")

success_merge <- success %>% unite(vol_ID, c(`FRUS Year`, `FRUS Volume`), sep = "|")

r <- left_join(success_merge, volume_list_merge, by = "vol_ID") %>% unite(article_ID, c(`Article Title`, vol_ID), sep = "_")

unique_r <- r %>% dplyr::group_by(`article_ID`) %>% dplyr::summarize(release = first (release_year), article_year = first(`Article Year`)) %>% separate(`article_ID`, sep = "_", into = c("title", "vol_ID"))
unique_r

first_citation <- unique_r %>% filter(release > 1976) %>% group_by(vol_ID, release) %>% arrange(article_year)

first_citation_frus <- first_citation %>% summarize(first_cited_yr = first(article_year)) %>% mutate(lag = first_cited_yr-release)

first_citation_all <- unique_r %>% group_by(vol_ID, release) %>% arrange(article_year)
first_citation_frus_all <- first_citation_all %>% summarize(first_cited_yr = first(article_year)) %>% mutate(lag = first_cited_yr-release)

first_citation_frus %>% group_by(lag) %>% summarize(n = n())
first_citation_frus$first_cited_yr <- as.integer(first_citation_frus$first_cited_yr)

volume_list_surv <- volume_list %>% mutate(ID = paste0(subseries, "|", volume_number))

frus_volumes_merge <- read_csv("frus_volumes_merge_nov5_fixed.csv") %>% mutate(vol_ID = ID)
frus_volumes_merge_surv <- left_join(frus_volumes_merge, first_citation_frus_all, by = "vol_ID") %>% mutate(time = if_else(is.na(lag), 2020-release, lag), status = if_else(is.na(lag), 0, 1)) 
frus_volumes_merge_surv2 <- frus_volumes_merge_surv %>% select(-release, -first_cited_yr, -lag, -volume_number, -volume_title, -vol_id_new, -median_yr) %>% filter(release_year > 1976)
frus_volumes_merge_surv2 <- frus_volumes_merge_surv %>%  filter(release_year > 1976) %>% filter(time > -1)
km <- with(frus_volumes_merge_surv2, Surv(time, status)) 

surv_object <- Surv(time = frus_volumes_merge_surv$time, event = frus_volumes_merge_surv$status)
surv_object

#km_fit <- survfit(Surv(time, status) ~ 1, data = frus_volumes_merge_surv)
km_fit_2<- survfit(Surv(time, status) ~ 1, data = frus_volumes_merge_surv2)

#summary(km_fit, times = c(1,5,10,20,40,100))


fig_8 <- autoplot(km_fit_2, conf.int = FALSE, yScale = "frac") + labs (y = "Survival Probability (% of Volumes Never Cited)", x = "Time (years)") + theme_minimal()

#ggsave("fig8.jpg", fig_8, width = 7, height = 5)

```




```{r}
#Fig 11e FRUS docs count by document year since 1860
frus_doc_years <- read_csv("frus_docyears_in_vols_aug30.csv")
frus_doc_years_plot <- frus_doc_years %>% group_by(year) %>% summarize (doc_count = sum(docs_per_yr)) %>% filter(year > 1859)
frus_doc_years_plot %>% ggplot(aes(year, doc_count)) + geom_col(width = 0.7) + labs(x = "FRUS Document Year", y = "Document Count") + scale_x_continuous(breaks = seq(1860, 2000, 10)) + theme_bw()

```

